/*
Robustness checks
- Panel A: 60 day window
- Panel B: Generic Zocor Entry Event
- Panel C: Indexing Time by Patient Experience
- Panel D: Ever Took New Drug
*/



/*
Panel A:
- Create 60-day window experimental groups
- Re-run core analysis
*/

clear
use user_first_cholesterol_dates

rename first_use_date first_date

gen indic_id = 5

merge m:1 indic_id using entry_dates_cholesterol_fixed
keep if _merge == 3 // should all merge
drop _merge

summ max_seq
local loop_end = r(max)

forvalues i=1/`loop_end' {
	gen diff = first_date - entry_date`i' 
	
	gen treat_alt`i' = 1 if diff >= 0 & diff <= 60
	gen control_alt`i' = 1 if diff < 0 & diff >= -60
	
	tab treat_alt`i'
	tab control_alt`i'
	drop diff
}

// in the form user id, indication, entering drug characteristics, treatment/control
display _N

keep enrolid treat* control* generic_id* generic* xr* entry_date*

// reshape to long, drop irrelevant entries
reshape long treat_alt control_alt generic_id generic xr entry_date, i(enrolid) j(entry_num)

drop if treat_alt == . & control_alt == . // drop a bunch of entries

gen indic_id = 5

// summarize
duplicates tag enrolid, gen(t)
tab t
drop t

gen k = runiform()
sort enrolid k
egen t = tag(enrolid)
keep if t == 1
drop t

save user_treatment_indicators_cholesterol_alt_window, replace





set more off
clear
use user_quarter_choice_cholesterol


// focus on cholesterol
tab indic_id
keep if indic_id == 5



// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen first_generic_temp = generic if quarter == first_q
egen first_generic = max(first_generic_temp), by(enrolid)

gen first_xr_temp = xr if quarter == first_q
egen first_xr = max(first_xr_temp), by(enrolid)


gen period = quarter - first_q
gen chose_initial = (generic_id == first_choice) & (generic == first_generic) & (xr == first_xr)


gen initial_generic_temp = generic if quarter == first_q
egen initial_generic_user = max(initial_generic_temp), by(enrolid)


drop first_choice_temp first_choice



// need name for overlap analysis
merge m:1 generic_id using generic_name_mapping
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using "user_treatment_indicators_cholesterol_alt_window"


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

//replace treat_alt = 0 if control_alt == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - entry_quarter
//keep if rel_quart >= 0

// A) generate the outcomes
gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug


// B) analyze
gen treat_generic = treat * generic 
gen treat_xr = treat * xr
// is stickiness bigger for generic choice? (doesn't have to be)


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner; use their first choice, otherwise some not there in period 0)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid


forvalues i=3(3)15 {

	
	display "Offset: `i' quarters"	
	eststo: ivregress 2sls chose_entry_drug (initial_choice=treat) if rel_quart == `i' & generic == 0, r	
}

esttab using table_A2_A.tex, con r2 se label replace booktabs keep(initial_choice) title(Regression table \label{tab1})
eststo clear





/*
Panel B: apply same approach to generic Zocor entry entry
- same as Table 1.do but restricted to one entry event
*/



set more off
clear
use user_quarter_choice_cholesterol


// focus on cholesterol
tab indic_id
keep if indic_id == 5


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen first_generic_temp = generic if quarter == first_q
egen first_generic = max(first_generic_temp), by(enrolid)

gen first_xr_temp = xr if quarter == first_q
egen first_xr = max(first_xr_temp), by(enrolid)


gen period = quarter - first_q
gen chose_initial = (generic_id == first_choice) & (generic == first_generic) & (xr == first_xr)


eststo clear



gen initial_generic_temp = generic if quarter == first_q
egen initial_generic_user = max(initial_generic_temp), by(enrolid)

drop first_choice_temp first_choice


// need name for overlap analysis
merge m:1 generic_id using generic_name_mapping
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using user_treatment_indicators_cholesterol


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

//replace treat_alt = 0 if control_alt == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - entry_quarter
//keep if rel_quart >= 0

// A) generate the outcomes
gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug


// B) analyze
gen treat_generic = treat * generic 
gen treat_xr = treat * xr
// is stickiness bigger for generic choice? (doesn't have to be)


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner; use their first choice, otherwise some not there in period 0)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid


//////////////////////////
// IV - Breakdown (B/G) //
//////////////////////////

keep if entry_num == 12 // key line


forvalues i=3(3)15 {

	
	display "Offset: `i' quarters"	
	eststo: ivregress 2sls chose_entry_drug (initial_choice=treat) if rel_quart == `i', r	
}

esttab using table_A2_B.tex, con r2 se label replace booktabs keep(initial_choice) title(Regression table \label{tab1})
eststo clear









/*
Panel C
- Alternative time indexing
*/

clear
use user_quarter_choice_cholesterol


// focus on cholesterol
tab indic_id
keep if indic_id == 5


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen first_generic_temp = generic if quarter == first_q
egen first_generic = max(first_generic_temp), by(enrolid)

gen first_xr_temp = xr if quarter == first_q
egen first_xr = max(first_xr_temp), by(enrolid)


gen period = quarter - first_q // KEY CHANGE
gen chose_initial = (generic_id == first_choice) & (generic == first_generic) & (xr == first_xr)


eststo clear




gen initial_generic_temp = generic if quarter == first_q
egen initial_generic_user = max(initial_generic_temp), by(enrolid)


drop first_choice_temp first_choice





/////////////////////////////////////////////
// IDENTIFIED ANALYSIS: USING ENTRY EVENTS //
/////////////////////////////////////////////

// need name for overlap analysis
merge m:1 generic_id using generic_name_mapping
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using "user_treatment_indicators_cholesterol"


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

//replace treat_alt = 0 if control_alt == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - first_q // NEW & CHANGED (person level period markers)
//keep if rel_quart >= 0

// A) generate the outcomes
gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug


// B) analyze
gen treat_generic = treat * generic 
gen treat_xr = treat * xr
// is stickiness bigger for generic choice? (doesn't have to be)


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner; use their first choice, otherwise some not there in period 0)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid


forvalues i=3(3)15 {
	display "Offset: `i' quarters"	
	eststo: ivregress 2sls chose_entry_drug (initial_choice=treat_alt) if rel_quart == `i' & generic == 0	
}

esttab using table_A2_C.tex, con r2 se label replace booktabs keep(initial_choice) title(Regression table \label{tab1})
eststo clear





/*
Panel D: Ever use new drug
- Step 1: create a more detail panel dataset
- Step 2: analyze outcome
*/

clear
set more off
use enrolid genind ndcnum svcdate using ccaed1996

// filter data
merge m:1 ndcnum using "compressed_redbook_w_indications", keepus(generic_id indic_id xr) keep(mat)
keep if indic_id == 5
drop _merge

merge m:1 enrolid using "all_enrolid_info", keep(mat) keepus(start_year)
drop _merge



forvalues j=1997/2013 {
	append using ccaed`j', keep(enrolid genind ndcnum svcdate)
	
	drop generic_id indic_id xr start_year
	
	merge m:1 ndcnum using "compressed_redbook_w_indications", keepus(generic_id indic_id xr) keep(mat)
	keep if indic_id == 5
	drop _merge
	
	// filter data (keep size down)
	merge m:1 enrolid using "all_enrolid_info", keep(mat) keepus(start_year)
	drop _merge
	
}


// panel has 15 million people (2.5 million in 10 year panel)
// data does match people over years


// generics: genind (type of situation and generic vs. brand)
destring genind, replace force
keep if genind >= 1 & genind <= 5

gen generic = (genind == 4) | (genind == 5)
gen generic_available = (genind == 3)


// tack on Redbook information to get ingredient name
// get things like: indic_id, generic name, extended release version (indication from Cortellis)
drop genind


///////////////
// END SETUP //
///////////////

//keep if indic_id == 5 // start by looking at cholesterol (but code general enough to handle several)

// figure out when a user first starts using anything in the class
sort enrolid indic_id svcdate generic // go with brand if multiple of the same day
egen first_use = tag(enrolid indic_id)

// label new subscription (first and six months after the start of the sample)
replace first_use = 0 if first_use == 1 & svcdate < mdy(6,1,start_year) // MODIFIED


gen year = year(svcdate)
gen quarter = qofd(svcdate)
tab quarter

gen prescriptions = 1
egen drugid = group(generic_id generic xr) // a drug is ingredient x generic x extended release


collapse (sum) prescriptions (max) first_use (first) generic_id generic xr year, by(enrolid indic_id quarter drugid) // KEY DIFFERENCE vs. baseline


// patch for dealing with multiple prescriptions (still want to record that this quarter was the first
egen first_use_temp = max(first_use), by(enrolid indic_id quarter)
replace first_use = first_use_temp
drop first_use_temp

// problematic step: will drop first use if first use involves two drugs
gsort enrolid indic_id quarter - prescriptions drugid // drugid for multiple prescription cases (need to clean this up later)

egen t = tag(enrolid indic_id quarter)
keep if t == 1 // keep top prescription for the quarter
drop t

sort enrolid indic_id quarter

save user_history_cholesterol_full, replace // worries about under sampling early years (could try to over sample them)



clear
use user_history_cholesterol_full


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen period = quarter - first_q
gen chose_initial = (first_choice == generic_id)


merge m:1 generic_id using generic_name_mapping
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using user_treatment_indicators_cholesterol_full_long


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

replace treat_alt = 0 if control_alt == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - entry_quarter

gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug

// B) analyze
gen treat_generic = treat * generic 
gen treat_xr = treat * xr
// is stickiness bigger for generic choice? (doesn't have to be)


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid


// NEW: any quarter
// some users counted a couple of times (for different entries)
sort enrolid entry_num quarter
gen ever_entry_drug = chose_entry_drug
replace ever_entry_drug = ever_entry_drug[_n-1] if ever_entry_drug[_n-1] == 1 & enrolid == enrolid[_n-1] & entry_num == entry_num[_n-1]


gen diff_entry = ever_entry_drug - chose_entry_drug
tab diff_entry



forvalues i=3(3)15 {
	
	display "Offset: `i' quarters"	
	eststo: ivregress 2sls ever_entry_drug (initial_choice=treat) if rel_quart == `i' & generic == 0, r 	
}

esttab using table_A2_D.tex, con r2 se label replace booktabs keep(initial_choice) title(Regression table \label{tab1})
eststo clear




